- Sun 09 February 2020
- Data Science
- Michael Lehotay
- #data-analysis, #visualization, #SQL
I recently scraped the PvP leaderboards from the World of Warcraft website and wrote them out to a SQLite database. Let's plot some charts of the data and see if anything interesting turns up.
There are three leaderboards: the 2v2 Arena, the 3v3 Arena, and the 10x10 Battleground. Each leaderboard lists the top 1000 players by rating. (I think these are Elo ratings.) Anyway, I saved the leaderboards to the SQL database as three separate tables. I also scraped a minimal amount of data from the profile pages of each of the characters on the leaderboards and saved the profile data to a fourth table.
import sqlite3
import altair as alt
import pandas as pd
query = '''
SELECT *
FROM sqlite_master
WHERE type='table'
'''
con = sqlite3.connect('data/wow.db')
pd.read_sql(query, con)
| type | name | tbl_name | rootpage | sql | |
|---|---|---|---|---|---|
| 0 | table | arena_2v2 | arena_2v2 | 2 | CREATE TABLE "arena_2v2" (\n"rank" INTEGER,\n ... |
| 1 | table | arena_3v3 | arena_3v3 | 32 | CREATE TABLE "arena_3v3" (\n"rank" INTEGER,\n ... |
| 2 | table | battlegrounds | battlegrounds | 62 | CREATE TABLE "battlegrounds" (\n"rank" INTEGER... |
| 3 | table | players | players | 93 | CREATE TABLE "players" (\n"name" TEXT,\n "tit... |
query = '''
SELECT name, rank, rating, wins, losses, achievement, ilvl
FROM battlegrounds
LEFT JOIN players
ON battlegrounds.url = players.URL
'''
con = sqlite3.connect('data/wow.db')
df = pd.read_sql(query, con)
con.close()
df
| name | rank | rating | wins | losses | achievement | ilvl | |
|---|---|---|---|---|---|---|---|
| 0 | Crdefender | 1 | 2163 | 81 | 4 | 18265.0 | 456.0 |
| 1 | Lifeswaplol | 1 | 2163 | 80 | 3 | 8945.0 | 462.0 |
| 2 | Wolf | 3 | 2153 | 77 | 4 | 18155.0 | 463.0 |
| 3 | Intricate | 4 | 2135 | 72 | 4 | 10530.0 | 455.0 |
| 4 | Jøkes | 5 | 2125 | 69 | 2 | 15550.0 | 459.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 995 | Evileretta | 995 | 1443 | 9 | 14 | 12470.0 | 452.0 |
| 996 | Idoless | 995 | 1443 | 10 | 12 | 13985.0 | 447.0 |
| 997 | Vyaz | 995 | 1443 | 8 | 3 | 18820.0 | 457.0 |
| 998 | Xwarlord | 995 | 1443 | 12 | 14 | 10505.0 | 447.0 |
| 999 | Thedarklord | 1000 | 1442 | 15 | 12 | 16195.0 | 437.0 |
1000 rows × 7 columns
df['num_matches'] = df['wins'] + df['losses']
df['win_ratio'] = df['wins'] / df['num_matches']
alt.Chart(df).mark_point().encode(x='rank', y='rating')
alt.Chart(df).mark_point().encode(x='rank', y='ilvl')
alt.Chart(df).mark_point().encode(alt.X('rating:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(zero=False)))
alt.Chart(df).mark_point().encode(x='rank', y='achievement')
alt.Chart(df).mark_point().encode(x='rank', y='win_ratio')
alt.Chart(df).mark_point().encode(alt.X('win_ratio:Q', scale=alt.Scale(zero=False)), y='num_matches')
alt.Chart(df).mark_point().encode(alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)), x='num_matches')
alt.Chart(df).mark_point().encode(x='num_matches', y='wins')
alt.Chart(df).mark_bar().encode(alt.X("rating:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("rank:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("ilvl:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("achievement:Q", bin=True), y='count()')
alt.Chart(df).mark_circle().encode(
alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)),
alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False))
).properties(
width=100,
height=100
).repeat(
row=['rank', 'rating', 'win_ratio', 'ilvl', 'achievement'],
column=['rank', 'rating', 'win_ratio', 'ilvl', 'achievement']
)
query = '''
SELECT *, '2v2' as board
FROM arena_2v2
UNION ALL
SELECT *, '3v3' as board
FROM arena_3v3
UNION ALL
SELECT *, 'battlegrounds' as board
FROM battlegrounds
'''
con = sqlite3.connect('data/wow.db')
df_boards = pd.read_sql(query, con)
con.close()
df_boards
| rank | rating | player | class | faction | realm | wins | losses | url | board | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2611 | Dinoe | Druid | ALLIANCE | Sargeras | 209 | 25 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 1 | 2 | 2591 | Thugonomiczz | Warlock | ALLIANCE | Stormrage | 95 | 9 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 2 | 3 | 2539 | Drãke | Monk | ALLIANCE | Stormrage | 118 | 36 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 3 | 4 | 2514 | Niarb | Paladin | ALLIANCE | Laughing Skull | 98 | 42 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 4 | 5 | 2499 | Kubyzy | Druid | ALLIANCE | Kel'Thuzad | 115 | 21 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2994 | 995 | 1443 | Evileretta | Warlock | ALLIANCE | Vashj | 9 | 14 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2995 | 995 | 1443 | Idoless | Priest | HORDE | Magtheridon | 10 | 12 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2996 | 995 | 1443 | Vyaz | Rogue | ALLIANCE | Stormrage | 8 | 3 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2997 | 995 | 1443 | Xwarlord | Priest | HORDE | Firetree | 12 | 14 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2998 | 1000 | 1442 | Thedarklord | Mage | ALLIANCE | Sargeras | 15 | 12 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
2999 rows × 10 columns
alt.Chart(df_boards).mark_point().encode(x='rank', y='rating', color='board')
query = '''
SELECT board, rank, rating, name, title, realm, class, details,
faction, wins, losses, achievement, ilvl, players.url
FROM (
SELECT *, '2v2 arenas' as board
FROM arena_2v2
UNION
SELECT *, '3v3 arenas' as board
FROM arena_3v3
UNION
SELECT *, 'battlegrounds' as board
FROM battlegrounds
) leaderboards
JOIN players
WHERE players.url = leaderboards.url
'''
con = sqlite3.connect('data/wow.db')
df_boards = pd.read_sql(query, con)
con.close()
df_boards
| board | rank | rating | name | title | realm | class | details | faction | wins | losses | achievement | ilvl | url | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2v2 arenas | 1 | 2611 | Dinoe | Notorious Gladiator | Sargeras | Druid | 120 Night Elf Restoration Druid | ALLIANCE | 209 | 25 | 22180 | 465 | https://worldofwarcraft.com/en-us/character/us... |
| 1 | 3v3 arenas | 113 | 2155 | Dinoe | Notorious Gladiator | Sargeras | Druid | 120 Night Elf Restoration Druid | ALLIANCE | 141 | 50 | 22180 | 465 | https://worldofwarcraft.com/en-us/character/us... |
| 2 | 2v2 arenas | 2 | 2591 | Thugonomiczz | Wrathful Gladiator | Stormrage | Warlock | 120 Dwarf Destruction Warlock | ALLIANCE | 95 | 9 | 15320 | 465 | https://worldofwarcraft.com/en-us/character/us... |
| 3 | 2v2 arenas | 3 | 2539 | Drãke | Dread Gladiator | Stormrage | Monk | 120 Human Windwalker Monk | ALLIANCE | 118 | 36 | 10075 | 469 | https://worldofwarcraft.com/en-us/character/us... |
| 4 | 3v3 arenas | 12 | 2395 | Drãke | Dread Gladiator | Stormrage | Monk | 120 Human Windwalker Monk | ALLIANCE | 202 | 83 | 10075 | 469 | https://worldofwarcraft.com/en-us/character/us... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2965 | battlegrounds | 995 | 1443 | Xwarlord | High Warlord | Firetree | Priest | 120 Blood Elf Holy Priest | HORDE | 12 | 14 | 10505 | 447 | https://worldofwarcraft.com/en-us/character/us... |
| 2966 | battlegrounds | 995 | 1443 | Aviana | the Insane | Doomhammer | Priest | 120 Night Elf Holy Priest | ALLIANCE | 12 | 14 | 28380 | 457 | https://worldofwarcraft.com/en-us/character/us... |
| 2967 | battlegrounds | 995 | 1443 | Idoless | Warlord | Magtheridon | Priest | 120 Blood Elf Discipline Priest | HORDE | 10 | 12 | 13985 | 447 | https://worldofwarcraft.com/en-us/character/us... |
| 2968 | battlegrounds | 995 | 1443 | Evileretta | Justicar | Vashj | Warlock | 120 Human Destruction Warlock | ALLIANCE | 9 | 14 | 12470 | 452 | https://worldofwarcraft.com/en-us/character/us... |
| 2969 | battlegrounds | 1000 | 1442 | Thedarklord | Vanquisher | Sargeras | Mage | 120 Human Frost Mage | ALLIANCE | 15 | 12 | 16195 | 437 | https://worldofwarcraft.com/en-us/character/us... |
2970 rows × 14 columns
alt.Chart(df_boards).mark_point().encode(x='rank', y='rating', color='board')
alt.Chart(df_boards).mark_point().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('rating:Q', scale=alt.Scale(zero=False)),
color='board')
alt.Chart(df_boards).mark_point(opacity=0.75).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(zero=False)),
color='board')
alt.Chart(df_boards).mark_point(opacity=0.75, clip=True).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(domain=(420, 480))), color='board')
alt.Chart(df_boards).mark_point(opacity=0.75, clip=True).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(domain=(420, 480))),
color='class')
df_boards['num_matches'] = df_boards['wins'] + df_boards['losses']
df_boards['win_ratio'] = df_boards['wins'] / df_boards['num_matches']
alt.Chart(df_boards).mark_circle().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)), color='class')
alt.Chart(df_boards).mark_circle().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)),
color='faction')
alt.Chart(df_boards).mark_bar().encode(x='class', y='count()')
alt.Chart(df_boards).mark_bar().encode(
x='class',
y='count()',
color='faction'
)
alt.Chart(df_boards).mark_bar().encode(
x='faction',
y='win_ratio',
)
bar = alt.Chart(df_boards).mark_bar().encode(
x='class:O',
y='mean(rating):Q'
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
y='mean(rating):Q'
)
(bar + rule).properties(width=600)
bar = alt.Chart(df_boards).mark_bar().encode(
alt.Y('mean(rating)', scale=alt.Scale(zero=False)),
x='class'
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
y='mean(rating)'
)
(bar + rule).properties(width=500)
df_boards['rank2'] = 1000 - df_boards['rank']
alt.Chart(df_boards).mark_bar().encode(
alt.Y('mean(rank2)', scale=alt.Scale(zero=False)),
x='class',
column='board'
)
alt.Chart(df_boards).mark_bar().encode(
alt.X('class'),
alt.Y('mean(rank2)', scale=alt.Scale(zero=False)),
alt.Color('board'),
alt.Column('board')
)
bar = alt.Chart(df_boards).mark_bar().encode(
alt.X('mean(rank2)', scale=alt.Scale(zero=False)),
alt.Y('details', sort='-x'),
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
x='mean(rank2)'
)
(bar + rule)
alt.Chart(df_boards).transform_fold(
['2v2 arenas', '3v3 arenas', 'battlegrounds'],
as_=['Leaderboard', '# Characters']
).mark_area(
opacity=0.5,
interpolate='step'
).encode(
alt.X('rating:Q', bin=alt.Bin(maxbins=100)),
alt.Y('count()', stack=None),
alt.Color('board:N')
)